services:
  chain-server:
    container_name: chain-server
    image: chain-server:latest
    build:
      context: ../../
      dockerfile: ./RetrievalAugmentedGeneration/Dockerfile
      args:
        EXAMPLE_NAME: query_decomposition_rag
    command: --port 8081 --host 0.0.0.0
    environment:
      APP_VECTORSTORE_URL: "http://milvus:19530"
      APP_VECTORSTORE_NAME: "milvus"
      APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-ai-llama2-70b}
      APP_LLM_MODELENGINE: nvidia-ai-endpoints
      APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
      APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-ai-embed-qa-4}
      APP_EMBEDDINGS_MODELENGINE: nvidia-ai-endpoints
      APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""}
      APP_TEXTSPLITTER_MODELNAME: WhereIsAI/UAE-Large-V1
      APP_TEXTSPLITTER_CHUNKSIZE: 510
      APP_TEXTSPLITTER_CHUNKOVERLAP: 200
      NVIDIA_API_KEY: ${NVIDIA_API_KEY}
      APP_PROMPTS_CHATTEMPLATE: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature."
      APP_PROMPTS_RAGTEMPLATE: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user."
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password}
      POSTGRES_USER: ${POSTGRES_USER:-postgres}
      POSTGRES_DB: ${POSTGRES_DB:-api}
      APP_RETRIEVER_TOPK: 4
      APP_RETRIEVER_SCORETHRESHOLD: 0.25
      COLLECTION_NAME: query_decomposition
      OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
      OTEL_EXPORTER_OTLP_PROTOCOL: grpc
      ENABLE_TRACING: false
    ports:
    - "8081:8081"
    expose:
    - "8081"
    shm_size: 5gb
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]

  rag-playground:
    container_name: rag-playground
    image: rag-playground:latest
    build:
      context: ../.././RetrievalAugmentedGeneration/frontend/
      dockerfile: Dockerfile
    command: --port 8090
    environment:
      APP_SERVERURL: http://chain-server
      APP_SERVERPORT: 8081
      APP_MODELNAME: ${APP_LLM_MODELNAME:-ai-llama2-70b}
      RIVA_API_URI: ${RIVA_API_URI:-}
      RIVA_API_KEY: ${RIVA_API_KEY:-}
      RIVA_FUNCTION_ID: ${RIVA_FUNCTION_ID:-}
      TTS_SAMPLE_RATE: ${TTS_SAMPLE_RATE:-48000}
      OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
      OTEL_EXPORTER_OTLP_PROTOCOL: grpc
      ENABLE_TRACING: false
    ports:
    - "8090:8090"
    expose:
    - "8090"
    depends_on:
    - chain-server

networks:
  default:
    name: nvidia-rag
